source("Scripts/00_setup.R")
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.0 ✔ purrr 0.3.2
## ✔ tibble 2.1.3 ✔ dplyr 0.8.3
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: maps
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
##
## Attaching package: 'plotly'
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
##
## Attaching package: 'reshape'
## The following object is masked from 'package:plotly':
##
## rename
## The following object is masked from 'package:data.table':
##
## melt
## The following object is masked from 'package:dplyr':
##
## rename
## The following objects are masked from 'package:tidyr':
##
## expand, smiths
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:chron':
##
## days, hours, minutes, seconds, years
## The following object is masked from 'package:reshape':
##
## stamp
## The following objects are masked from 'package:data.table':
##
## hour, isoweek, mday, minute, month, quarter, second, wday,
## week, yday, year
## The following object is masked from 'package:base':
##
## date
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following object is masked from 'package:lubridate':
##
## here
## The following objects are masked from 'package:reshape':
##
## rename, round_any
## The following objects are masked from 'package:plotly':
##
## arrange, mutate, rename, summarise
## The following object is masked from 'package:maps':
##
## ozone
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following object is masked from 'package:purrr':
##
## compact
##
## Attaching package: 'foreach'
## The following object is masked from 'package:chron':
##
## times
## The following objects are masked from 'package:purrr':
##
## accumulate, when
## Loading required package: iterators
##
## Attaching package: 'cluster'
## The following object is masked from 'package:maps':
##
## votes.repub
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:ggplot2':
##
## margin
Loading data
load(file = paste0(IO$output_data, "users.Rdata"), verbose = TRUE)
## Loading objects:
## users
load(file = paste0(IO$output_data, "cycles.Rdata"), verbose = TRUE)
## Loading objects:
## cycles
Table 1
# average app usage duration
agg_start = aggregate(cycle_start ~ user_id, cycles, min)
colnames(agg_start) = c("user_id", "start_first_cycle")
agg_end = aggregate(cycle_start+cycle_length ~ user_id, cycles, max)
colnames(agg_end) = c("user_id", "end_last_cycle")
agg = merge(agg_start, agg_end)
m = match(users$user_id, agg$user_id)
agg = agg[m,]
agg$app_duration_in_days = as.numeric(agg$end_last_cycle - agg$start_first_cycle)
agg$app_duration_in_years = agg$app_duration_in_days / 365
ju = which(users$BC != "unclear")
jc = which(cycles$BC != "unclear")
# Number of users
df = data.frame(field = "Number of users",original_dataset = nrow(users), filtered_dataset = sum(users$BC != "unclear"))
# Number of cycles
df = rbind(df,
data.frame(field = "Number of cycles",original_dataset = nrow(cycles), filtered_dataset = sum(cycles$BC != "unclear")))
# Number of breast tenderness symptoms
df = rbind(df,
data.frame(field = "Number of breast tenderness symptoms",original_dataset = sum(cycles$n_TB), filtered_dataset = sum(cycles$n_TB[jc])))
# average app usage duration
df = rbind(df,
data.frame(field = "Average app usage duration (in year)",
original_dataset = round(mean(agg$app_duration_in_years),2),
filtered_dataset = round(mean(agg$app_duration_in_years[ju]),2)))
df = rbind(df,
data.frame(field = "Average app usage duration (sd)",
original_dataset = round(sd(agg$app_duration_in_years),2),
filtered_dataset = round(sd(agg$app_duration_in_years[ju]),2)))
# average app number of cycles
df = rbind(df,
data.frame(field = "Average # of cycles per users",
original_dataset = round(mean(users$n_cycles),2),
filtered_dataset = round(mean(users$n_cycles[ju]),2)))
df = rbind(df,
data.frame(field = "Average # of cycles (sd)",
original_dataset = round(sd(users$n_cycles),2),
filtered_dataset = round(sd(users$n_cycles[ju]),2)))
# average number of breast tenderness per users
df = rbind(df,
data.frame(field = "Average # of breast tenderness per users",
original_dataset = round(mean(users$n_tender_breasts),2),
filtered_dataset = round(mean(users$n_tender_breasts[ju]),2)))
df = rbind(df,
data.frame(field = "Median # of breast tenderness",
original_dataset = round(median(users$n_tender_breasts),2),
filtered_dataset = round(median(users$n_tender_breasts[ju]),2)))
df = rbind(df,
data.frame(field = "Average # of breast tenderness (sd)",
original_dataset = round(sd(users$n_tender_breasts),2),
filtered_dataset = round(sd(users$n_tender_breasts[ju]),2)))
# average number of breast tenderness per cycles
df = rbind(df,
data.frame(field = "Average # of breast tenderness per cycle",
original_dataset = round(mean(cycles$n_TB),2),
filtered_dataset = round(mean(cycles$n_TB[jc]),2)))
df = rbind(df,
data.frame(field = "Average # of breast tenderness per cycle (sd) ",
original_dataset = round(sd(cycles$n_TB),2),
filtered_dataset = round(sd(cycles$n_TB[jc]),2)))
# percentage of cycles per BC (original)
table_BC_CLUE_o = table(cycles$birth_control_CLUE)
table_BC_CLUE_o_perc = round(100*table_BC_CLUE_o/nrow(cycles),2)
table_BC_CLUE_f = table(cycles$birth_control_CLUE[jc])
table_BC_CLUE_f_perc = round(100*table_BC_CLUE_f/length(jc),2)
df = rbind(df,
data.frame(field = paste0("% of cycles per BC as logged by the users (",names(table_BC_CLUE_o_perc),")"),
original_dataset = as.numeric(table_BC_CLUE_o_perc),
filtered_dataset = as.numeric(table_BC_CLUE_f_perc)))
# percentage of cycles per BC (re-assigned)
table_BC_o = table(cycles$BC)
table_BC_o_perc = round(100*table_BC_o/nrow(cycles),2)
table_BC_f = table(cycles$BC[jc])
table_BC_f_perc = round(100*table_BC_f/length(jc),2)
df = rbind(df,
data.frame(field = paste0("% of cycles per BC as re-assigned (",names(table_BC_o_perc),")"),
original_dataset = as.numeric(table_BC_o_perc),
filtered_dataset = c(as.numeric(table_BC_f_perc),0)))
df
## field
## 1 Number of users
## 2 Number of cycles
## 3 Number of breast tenderness symptoms
## 4 Average app usage duration (in year)
## 5 Average app usage duration (sd)
## 6 Average # of cycles per users
## 7 Average # of cycles (sd)
## 8 Average # of breast tenderness per users
## 9 Median # of breast tenderness
## 10 Average # of breast tenderness (sd)
## 11 Average # of breast tenderness per cycle
## 12 Average # of breast tenderness per cycle (sd)
## 13 % of cycles per BC as logged by the users (none / condoms)
## 14 % of cycles per BC as logged by the users (pill)
## 15 % of cycles per BC as logged by the users (did not enter)
## 16 % of cycles per BC as re-assigned (none / condoms)
## 17 % of cycles per BC as re-assigned (pill)
## 18 % of cycles per BC as re-assigned (unclear)
## original_dataset filtered_dataset
## 1 216484.00 210529.00
## 2 3686977.00 3056348.00
## 3 4530662.00 4132033.00
## 4 1.54 1.55
## 5 0.65 0.65
## 6 17.03 17.15
## 7 7.45 7.48
## 8 20.93 21.27
## 9 8.00 8.00
## 10 36.06 36.43
## 11 1.23 1.35
## 12 2.94 3.12
## 13 28.42 31.50
## 14 39.08 39.29
## 15 32.50 29.21
## 16 48.94 59.04
## 17 33.95 40.96
## 18 17.10 0.00
write.csv(df, file = paste0(IO$tables,"Table_1.csv") , row.names = FALSE)
Supplementary Table 1
table_n = table(cycles$birth_control_CLUE, cycles$BC)
table_perc = round( 100*table_n/apply(table_n, 1, sum) , 2)
rownames(table_n) = paste0(rownames(table_n), ": n cycles")
table_n
##
## none / condoms pill unclear
## none / condoms: n cycles 895215 67507 85295
## pill: n cycles 215958 984776 240084
## did not enter: n cycles 693365 199527 305250
rownames(table_perc) = paste0(rownames(table_perc), ": % cycles")
table_perc
##
## none / condoms pill unclear
## none / condoms: % cycles 85.42 6.44 8.14
## pill: % cycles 14.99 68.35 16.66
## did not enter: % cycles 57.87 16.65 25.48
table_BC = rbind(table_n,table_perc)
o = order(rownames(table_BC), decreasing = TRUE)
table_BC = table_BC[o,]
table_BC
## none / condoms pill unclear
## pill: n cycles 215958.00 984776.00 240084.00
## pill: % cycles 14.99 68.35 16.66
## none / condoms: n cycles 895215.00 67507.00 85295.00
## none / condoms: % cycles 85.42 6.44 8.14
## did not enter: n cycles 693365.00 199527.00 305250.00
## did not enter: % cycles 57.87 16.65 25.48
write.csv(table_BC, file = paste0(IO$tables, "Supplementary_Table_BC_table.csv"))
Table 2: users’ features
## [1] 20.59862
## [1] 4.946749
## [1] 20.61776
## [1] 4.953888
round(100*table(users$age_cat)/nrow(users),2)
##
## (10,15] (15,20] (20,25] (25,30] (30,35] (35,40] (40,45]
## 9.14 51.75 23.20 10.42 4.34 1.05 0.11
round(100*table(users$age_cat[ju])/length(ju),2)
##
## (10,15] (15,20] (20,25] (25,30] (30,35] (35,40] (40,45]
## 9.10 51.61 23.29 10.47 4.36 1.06 0.11
## [1] 24
## [1] 24
round(100*sort(table(users$country))/nrow(users),2)
##
## Venezuela Ecuador Ireland Belgium Sweden
## 0.04 0.05 0.05 0.11 0.16
## Peru Switzerland Austria Portugal Russia
## 0.17 0.19 0.24 0.52 0.74
## Denmark Chile Australia Argentina Colombia
## 0.75 0.93 1.56 1.58 1.64
## Italy Spain Canada Germany France
## 2.32 2.84 3.14 7.29 8.01
## Mexico United Kingdom Brazil United States
## 8.37 9.12 20.49 29.71
round(100*sort(table(users$country[ju]))/length(ju),2)
##
## Venezuela Ireland Ecuador Belgium Sweden
## 0.04 0.05 0.05 0.11 0.16
## Peru Switzerland Austria Portugal Russia
## 0.17 0.19 0.24 0.53 0.74
## Denmark Chile Australia Argentina Colombia
## 0.75 0.93 1.53 1.58 1.62
## Italy Spain Canada Germany France
## 2.34 2.85 3.12 7.30 8.01
## Mexico United Kingdom Brazil United States
## 8.42 9.05 20.48 29.74
mean(users$height, na.rm = TRUE)
## [1] 163.4378
sd(users$height, na.rm = TRUE)
## [1] 6.965825
mean(users$height[ju], na.rm = TRUE)
## [1] 163.4301
sd(users$height[ju], na.rm = TRUE)
## [1] 6.964167
mean(users$weight, na.rm = TRUE)
## [1] 61.05458
sd(users$weight, na.rm = TRUE)
## [1] 15.13491
mean(users$weight[ju], na.rm = TRUE)
## [1] 61.02502
sd(users$weight[ju], na.rm = TRUE)
## [1] 15.08613
mean(users$bmi, na.rm = TRUE)
## [1] 22.85278
sd(users$bmi, na.rm = TRUE)
## [1] 5.304415
mean(users$bmi[ju], na.rm = TRUE)
## [1] 22.84302
sd(users$bmi[ju], na.rm = TRUE)
## [1] 5.274114
round(100*sort(table(users$birth_control_o))/nrow(users),2)
##
## condoms none pill did not enter
## 10.12 18.25 35.67 35.96
round(100*sort(table(users$birth_control_o[ju]))/length(ju),2)
##
## condoms none did not enter pill
## 10.31 18.59 35.31 35.79
round(100*sort(table(users$BC))/nrow(users),2)
##
## unclear both pill none / condoms
## 2.75 21.07 26.03 50.15
round(100*sort(table(users$BC[ju]))/length(ju),2)
##
## both pill none / condoms
## 21.67 26.76 51.57
Demographics (Supplementary viz)
g = ggplot(users, aes(x = age, fill = birth_control))
g + geom_histogram(binwidth = 1) + facet_grid(birth_control ~.)

g = ggplot(users, aes(x = age, fill = country))
g + geom_histogram(binwidth = 1) + facet_wrap(country ~.)

load(file = paste0(IO$out_Rdata, "number_of_users_and_cycles_per_category.Rdata"), verbose = TRUE)
## Loading objects:
## agg
g = ggplot(agg, aes(y = BC, x= 0, col = BC)) +
facet_grid(bmi_cat ~ age_cat) +
geom_text(aes(label = n_users)) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.ticks = element_blank(),
axis.text.x = element_blank()) +
guides(color = FALSE, size = FALSE) + xlab("") + ylab("") +
ggtitle("Number of users per category of age (horizontal) and bmi (vertical)")
g
## Warning: Removed 16 rows containing missing values (geom_text).

ggsave(filename = paste0(IO$panels, "S1_X_number_of_users_per_cat.pdf"), plot = g)
## Saving 7 x 5 in image
## Warning: Removed 16 rows containing missing values (geom_text).
g = ggplot(agg, aes(y = BC, x= 0, col = BC)) +
facet_grid(bmi_cat ~ age_cat) +
geom_text(aes(label = n_cycles)) +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.ticks = element_blank(),
axis.text.x = element_blank()) +
guides(color = FALSE, size = FALSE) + xlab("") + ylab("") +
ggtitle("Number of cycles per category of age (horizontal) and bmi (vertical)")
g
## Warning: Removed 51 rows containing missing values (geom_text).

ggsave(filename = paste0(IO$panels, "S1_X_number_of_cycles_per_cat.pdf"), plot = g)
## Saving 7 x 5 in image
## Warning: Removed 51 rows containing missing values (geom_text).
Figure 1: Overall tender breasts profiles per age categories and BC for cycles with regular tracking
load(file = paste0(IO$out_Rdata,"TB_overal_pattern_by_age_cat_and_birth_control_for_regular_tracking.Rdata"), verbose = TRUE)
## Loading objects:
## agg
agg$age_x_BC = interaction(agg$age_cat , agg$BC)
agg$SE = agg$fraction_cycles_with_TB_SE
j = which(agg$age_cat %in% par$age_cat_exclude)
if(length(j)>0){agg = agg[-j,]}
j = which((agg$age_cat == "(35,40]") & (agg$BC == "pill"))
if(length(j)>0){agg = agg[-j,]}
agg = agg[which(agg$tracking_group == "regular tracking"),]
g = ggplot(agg, aes(x = cycleday_m_D, y = fraction_cycles_with_TB, col = age_x_BC, shape = BC)) +
geom_vline(xintercept = 0, col = "gray90", size = 1.5)+
geom_ribbon(aes(ymin = fraction_cycles_with_TB-1.96*SE, ymax = fraction_cycles_with_TB+1.96*SE, fill = age_x_BC), alpha = 0.5, col = NA)+
geom_line() + geom_point()+
scale_y_continuous(labels = scales::percent)+ scale_x_continuous(breaks = par$x.axis)+
scale_colour_manual(values= cols$age_x_BC , name="Age group x BC")+
scale_fill_manual(values= cols$age_x_BC , name="Age group x BC")+
guides(col = FALSE, fill = FALSE, shape = FALSE)+
xlab("cycleday from 1st day of menstruation") + ylab("% cycles with reported TB")+
theme(legend.position="bottom",legend.background = element_rect(color="gray90", fill = NA))#+
#facet_grid(. ~ tracking_group)
g

ggsave(filename = paste0(IO$panels, "symptoms_profile_by_BC_and_age_cat_for_regular_tracking.pdf"),
plot = g,
width = viz$full_width/2,
height = 0.5*viz$full_width/2,
scale = viz$scale)
load(file = paste0(IO$out_Rdata, "avg_symptoms_per_user.Rdata"), verbose = TRUE)
## Loading objects:
## agg
g = ggplot(agg[agg$BC %in% c("pill","none / condoms"),], aes(x = n_TB_by_n_cycles, y = perc_users, fill = BC)) +
geom_bar(stat = "identity", position = "dodge" )+
scale_fill_manual(values = cols$BC)+
xlab("# of reported TB symptoms / # of cycles")+
ylab("% of users") + guides(fill = FALSE)
g

ggsave(filename = paste0(IO$panels, "1_avg_TB_per_user.pdf"), plot = g, width = viz$full_width/4.5, height = viz$full_width/4.5, scale = viz$scale)
load(file = paste0(IO$out_Rdata, "n_TB_per_cycle.Rdata"), verbose=TRUE)
## Loading objects:
## agg
g = ggplot(agg[agg$BC != "unclear",], aes(x = tender_breasts_ul, y = perc_cycles, fill = BC)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_manual(values = cols$BC3) + guides(fill = FALSE)+
xlab("# of reported Tender Breast symptoms per cycle")+ ylab("% of cycles")
g

ggsave(filename = paste0(IO$panels, "1_number_of_TB_per_cycle.pdf"), plot = g, width = viz$full_width/4, height = viz$full_width/4.5, scale = viz$scale)
Figure S2: Predictor of Breast Tenderness.
load(file = paste0(IO$out_Rdata, "symptoms_predictors.Rdata"), verbose=TRUE)
## Loading objects:
## pred
g_z = ggplot(pred[!grepl("country",pred$input),], aes(x = cycleday_m_D, y = z_value, col = input, linetype = tracking_cluster)) +
geom_vline(xintercept = 0, size = 2, col = "gray90")+
geom_vline(xintercept = -14, size = 4, col = "gray90", alpha = 0.5)+
geom_hline(yintercept = 0, size = 0.1)+
geom_line() +
scale_x_continuous(breaks = par$x.axis)+
facet_wrap(input ~ .)+
guides(col = FALSE)
g_z

g_z = g_z + guides(linetype = FALSE)
g_p = ggplot(pred[!grepl("country",pred$input),], aes(x = cycleday_m_D, y = -log10(q_value), col = input, linetype = tracking_cluster)) +
geom_vline(xintercept = 0, size = 2, col = "gray90")+
geom_vline(xintercept = -14, size = 4, col = "gray90", alpha = 0.5)+
geom_hline(yintercept = 0, size = 0.1)+
geom_line() +
scale_x_continuous(breaks = par$x.axis)+
facet_wrap(input ~ .)+
guides(col = FALSE)
g_p

g_p = g_p + guides(linetype = FALSE)
g_z_countries = ggplot(pred[grep("country",pred$input),], aes(x = cycleday_m_D, y = z_value, col = input, linetype = tracking_cluster)) +
geom_vline(xintercept = 0, size = 2, col = "gray90")+
geom_vline(xintercept = -14, size = 4, col = "gray90", alpha = 0.5)+
geom_hline(yintercept = 0, size = 0.1)+
geom_line() +
scale_x_continuous(breaks = par$x.axis)+
facet_wrap(country_name ~ .)+
guides(col = FALSE)
g_z_countries

g_z_countries = g_z_countries + guides(linetype = FALSE)
g_p_countries = ggplot(pred[grep("country",pred$input),], aes(x = cycleday_m_D, y = -log10(q_value), col = input, linetype = tracking_cluster)) +
geom_vline(xintercept = 0, size = 2, col = "gray90")+
geom_vline(xintercept = -14, size = 4, col = "gray90", alpha = 0.5)+
geom_hline(yintercept = 0, size = 0.1)+
geom_line() +
scale_x_continuous(breaks = par$x.axis)+
facet_wrap(country_name ~ .)+
guides(col = FALSE)
g_p_countries

g_p_countries = g_p_countries + guides(linetype = FALSE)
ggsave(filename = paste0(IO$panels, "predictors_z.pdf"),
plot = g_z,
width = viz$full_width/2,
height = 0.75*viz$full_width/2,
scale = viz$scale)
ggsave(filename = paste0(IO$panels, "predictors_z_countries.pdf"),
plot = g_z_countries,
width = viz$full_width/2,
height = 0.75*viz$full_width/2,
scale = viz$scale)
ggsave(filename = paste0(IO$panels, "predictors_p.pdf"),
plot = g_p,
width = viz$full_width/2,
height = 0.75*viz$full_width/2,
scale = viz$scale)
ggsave(filename = paste0(IO$panels, "predictors_p_countries.pdf"),
plot = g_p_countries,
width = viz$full_width/2,
height = 0.75*viz$full_width/2,
scale = viz$scale)
Figure 2: Consistency of Symptoms
load(file = paste0(IO$out_Rdata,"consistent_and_inconsistent_users.Rdata"), verbose = TRUE)
## Loading objects:
## sel_d
sel_d_consistent = sel_d[sel_d$consistent == "consistent", ]
sel_d_average = sel_d[sel_d$consistent == "average", ]
sel_d_inconsistent = sel_d[sel_d$consistent == "inconsistent", ]
if((nrow(sel_d_inconsistent)>0)&(nrow(sel_d_consistent)>0)){
g_consistent = ggplot_imputed_TB(sel_d = sel_d_consistent,
facet_grid = c("BC","init_TB_group","user_id"), cycle_id = FALSE, col = "user_id")
g_average = ggplot_imputed_TB(sel_d = sel_d_average,
facet_grid = c("BC","init_TB_group","user_id"), cycle_id = FALSE, col = "user_id")
g_inconsistent = ggplot_imputed_TB(sel_d = sel_d_inconsistent,
facet_grid = c("BC","init_TB_group","user_id"), cycle_id = FALSE, col = "user_id")
grid.arrange(g_inconsistent, g_average, g_consistent, nrow = 1)
}

load(file = paste0(IO$out_Rdata,"consistent_and_inconsistent_users.Rdata"), verbose = TRUE)
## Loading objects:
## sel_d
user_ids =
c("586e33d674b46555254e908ec42d6771ed294bcd", "ff484ef33fd2c29086617ed63a9814571b86f15d",
"677332825213f1f0cb1e6831ead1fc787cf01663", "8714aad3100409673b8762775e474aff17321df4",
"40620830924a2f02935e3a6c6102f84e5de35fce","6ef947aeb9cfe0468bd1efd26b8a10dca0a1e645")
sel_d = sel_d[which(sel_d$user_id %in% user_ids),]
sel_d_consistent = sel_d[sel_d$consistent == "consistent", ]
sel_d_average = sel_d[sel_d$consistent == "average", ]
sel_d_inconsistent = sel_d[sel_d$consistent == "inconsistent", ]
if((nrow(sel_d_inconsistent)>0)&(nrow(sel_d_consistent)>0)&(nrow(sel_d_average)>0)){
g_consistent = ggplot_imputed_TB(sel_d = sel_d_consistent,
facet_grid = c("BC"), cycle_id = FALSE, col = "BC")+
scale_color_manual(values = cols$BC)+ylab("")+theme(strip.text = element_blank())
g_average = ggplot_imputed_TB(sel_d = sel_d_average,
facet_grid = c("BC"), cycle_id = FALSE, col = "BC")+
scale_color_manual(values = cols$BC)+ylab("")+theme(strip.text = element_blank())
g_inconsistent = ggplot_imputed_TB(sel_d = sel_d_inconsistent,
facet_grid = c("BC"), cycle_id = FALSE, col = "BC")+
scale_color_manual(values = cols$BC)+ylab("")+theme(strip.text = element_blank())
grid.arrange(g_inconsistent, g_average, g_consistent, nrow = 1)
g_consistency = arrangeGrob(g_inconsistent, g_average, g_consistent, nrow = 1, widths = c(1,1,1))
g_consistency
ggsave(filename = paste0(IO$panels, "consistency_examples.pdf"),
plot = g_consistency,
width = viz$full_width/2,
height = 0.33*viz$full_width/2,
scale = viz$scale*1.7)
}

load(file = paste0(IO$out_Rdata,"consistency_summary.Rdata"), verbose = TRUE)
## Loading objects:
## df
df$BC = factor(df$BC, levels = c(as.character(par$BC_dict$name),"both"))
df$BC_x_init_TB_group = interaction(df$init_TB_group, df$BC)
g = ggplot(df, aes(x = consistency, y = n_users, fill = BC_x_init_TB_group)) +
geom_area(position = "identity", alpha = 0.75) +
facet_grid(BC ~ . , scale = "free_y")+
scale_fill_manual(values = cols$BC_x_init_TB_group2)+
guides(fill = FALSE)
g

ggsave(filename = paste0(IO$panels, "consistency_summary.pdf"),
plot = g,
width = viz$full_width/2.5,
height = 0.6*viz$full_width/2.5,
scale = viz$scale)
Figure 2: Phenotyping
for(bc in c("NC","pill")){
i = (bc == "pill")+1
BC = par$BC_dict$name[i]
load(file = paste0(IO$tmp_data,"user_phenotyping_",bc,".Rdata"), verbose = TRUE)
# screeplot
g = ggplot(eig_df, aes(x = as.factor(x), y = eig)) + geom_bar(stat = "identity", fill = cols$BC[i])+
xlab("dimensions")+ggtitle(BC)
g
ggsave(filename = paste0(IO$panels, "S_screeplot_",bc,".pdf"),
plot = g,
width = viz$full_width/4,
height = 0.75*viz$full_width/4,
scale = viz$scale)
# interactive 3D
plot_ly(mds_df, x = ~X1, y = ~X2, z = ~X3,
color = ~n_days,
type = "scatter3d", mode = "markers",
marker = list(size = 4),
text = ~paste('ID: ', user_id))
plot_ly(mds_df, x = ~X1, y = ~X2, z = ~X3,
color = ~time,
type = "scatter3d", mode = "markers",
marker = list(size = 4),
text = ~paste('ID: ', user_id))
plot_ly(mds_df, x = ~X1, y = ~X2, z = ~X3,
color = ~max,
type = "scatter3d", mode = "markers",
marker = list(size = 4),
text = ~paste('ID: ', user_id))
plot_ly(mds_df, x = ~X1, y = ~X2, z = ~X3,
color = ~time_last_symptoms,
type = "scatter3d", mode = "markers",
marker = list(size = 4),
text = ~paste('ID: ', user_id))
# interactive 2D
plot_ly(mds_df, x = ~X1, y = ~X2,
color = ~X3,
type = "scatter", mode = "markers",
marker = list(size = 4),
text = ~paste('ID: ', user_id))
plot_ly(mds_df, x = ~X1, y = ~X3,
color = ~X2,
type = "scatter", mode = "markers",
marker = list(size = 4),
text = ~paste('ID: ', user_id))
if(bc == "pill"){
#mds_df$user_id[grep("65035d",mds_df$user_id)]
user_ids = c("0ceafb77c214fb6e45d069e6c645083a9f7799d0","d0fcb8602f237946a56162f047c8ef3ef254541c",special_pill_user,
"73266275a3d6f3898cf58269070edb4d4edd2034","e610274623760e9c4f1a50e0ed5a00f17117ed0b",
"65035d05a6ccabaaeb82ecd4e17bde1c096adc2f",
"c45cd0e050cf6d704da32cbab899bacf51a519a3")
}else{
user_ids = c("2ac70e8def770369d086e9249fee9d5b3485ad0d","cb2700ea5a33499b1c97113444e1b2e5d0d09162",
"c4bf18fef2f8db48d6f4a881b1a23be849cbbc06","b70dde8d03e0f39bdb4cb67c88b0d26ad6002bd1",
"2eedf35808effb0a40381e11d1c94257096ab8de","cffcd394d8f769d363f67eada7f16b0106c3191a",
"b9590a55307d916d4aabc8e386bc76586a28ae0b")
user_ids = c("2ac70e8def770369d086e9249fee9d5b3485ad0d","cb2700ea5a33499b1c97113444e1b2e5d0d09162",
"cffcd394d8f769d363f67eada7f16b0106c3191a","b70dde8d03e0f39bdb4cb67c88b0d26ad6002bd1",
"c4bf18fef2f8db48d6f4a881b1a23be849cbbc06","34c8890de2da03e29a7f93771ea9ecaf12cf4c88",
"2eedf35808effb0a40381e11d1c94257096ab8de")
}
j_mds = which(mds_df$user_id %in% user_ids)
mds_df_subset = mds_df[j_mds,]
mds_df_subset$user_id = factor(as.character(mds_df_subset$user_id), levels = user_ids)
# MDS
g_12 = ggplot(mds_df, aes(x = X1, y = X2)) + coord_fixed()+
geom_point(alpha = 0.5, size = 0.3, col = "gray40")+
geom_point(data = mds_df_subset, aes(x = X1, y = X2, col = user_id), size = 2, alpha = 0.7)+
scale_x_continuous(breaks = seq(-1,1,by = 0.2))+
scale_y_continuous(breaks = seq(-1,1,by = 0.2))+
guides(col = FALSE)
g_13 = ggplot(mds_df, aes(x = X1, y = X3)) + coord_fixed()+
geom_point(alpha = 0.5, size = 0.3, col = "gray40") +
geom_point(data = mds_df_subset, aes(x = X1, y = X3, col = user_id), size = 2, alpha = 0.7)+
scale_x_continuous(breaks = seq(-1,1,by = 0.2))+
scale_y_continuous(breaks = seq(-1,1,by = 0.2))+
guides(col = FALSE)
grid.arrange(g_12, g_13, nrow = 1)
g_mds = arrangeGrob(g_12, g_13, nrow = 2)
g_mds
ggsave(filename = paste0(IO$panels, "mds_",bc,".pdf"),
plot = g_mds,
width = viz$full_width/4,
height = viz$full_width/2.2,
scale = viz$scale)
ggsave(filename = paste0(IO$panels, "mds_",bc,"_12.pdf"),
plot = g_12,
width = viz$full_width/4,
height = viz$full_width/4,
scale = viz$scale)
ggsave(filename = paste0(IO$panels, "mds_",bc,"_13.pdf"),
plot = g_13,
width = viz$full_width/4,
height = viz$full_width/4,
scale = viz$scale)
avg_profiles = this_avg_profile_per_user[which(this_avg_profile_per_user$user_id %in% user_ids),]
c_wide_subset = this_c_wide[which(this_c_wide$user_id %in% user_ids),]
avg_profiles = melt(c_wide_subset)
avg_profiles$avg_tender_breast = avg_profiles$value
avg_profiles$user_id = factor(as.character(avg_profiles$user_id), levels = user_ids)
g_profiles = ggplot(avg_profiles, aes(x = cycleday_m_D, y = avg_tender_breast, fill = user_id, col = user_id))+
#geom_point()+
#geom_line()+
geom_area(position = "identity", alpha = 0.75)+
facet_grid(user_id ~ .)+
guides(col = FALSE, fill = FALSE)+
theme(strip.text = element_blank())+
scale_y_continuous(breaks = c(0,0.5,1))+
scale_x_continuous(breaks = par$x.axis)+
xlab("cycleday from menstruation")+
ylab("average breast tenderness")
g_profiles
ggsave(filename = paste0(IO$panels, "profiles_",bc,".pdf"),
plot = g_profiles,
width = viz$full_width/4,
height = 0.65*viz$full_width/4,
scale = viz$scale*1.5)
}
## Loading objects:
## mds_df
## eig_df
## rtsne_out_df
## this_avg_profile_per_user
## this_c_wide
## Loading objects:
## mds_df
## eig_df
## rtsne_out_df
## this_avg_profile_per_user
## this_c_wide


Figure 3: PILL TRANSITION
load(file = paste0(IO$out_Rdata,"pill_transition_profiles.Rdata"), verbose = TRUE)
## Loading objects:
## transition_profiles
transition_profiles$TB_change_cat_simple = factor(transition_profiles$TB_change_cat_simple , levels = c("increase","no change","decrease"))
agg = unique(transition_profiles[,c("TB_change_cat_simple","transition","n_users")])
agg$cycle_nb_m_from_trans = -4
agg
## TB_change_cat_simple transition n_users cycle_nb_m_from_trans
## 1 decrease off pill 534 -4
## 206 increase off pill 989 -4
## 414 no change off pill 232 -4
## 622 decrease on pill 4544 -4
## 830 increase on pill 3165 -4
## 1038 no change on pill 2146 -4
agg_sum = aggregate( n_users ~ transition, agg, FUN = sum )
colnames(agg_sum) = c("transition","n_users_tot")
agg = merge(agg, agg_sum, all = TRUE)
agg$perc_users = paste0(round(100*agg$n_users/agg$n_users_tot),"%")
agg$text = paste0(agg$n_users, "\n",agg$perc_users)
g = ggplot(transition_profiles, aes(x = cycleday_m_D, y = fraction_cycles_with_TB, col = BC)) +
geom_line()+ facet_grid(transition + TB_change_cat_simple ~ cycle_nb_m_from_trans) +
scale_x_continuous(breaks = par$x.axis, limits = c(-par$D,par$Df))+ guides(col = FALSE)+
geom_text(data = agg, aes(x = -par$D, y = 0.25*max(transition_profiles$fraction_cycles_with_TB), col = transition, label = text),
nudge_x = 7, nudge_y = 0, size = 3.5)+
scale_color_manual(values = rep(cols$BC,2))+
scale_y_continuous(labels = scales::percent)+
xlab("cycleday from menstruation")+
ylab("% of cycles with reported breast tenderness")
print(g)

ggsave(filename = paste0(IO$panels, "profiles_pill_transition.pdf"),
plot = g,
width = viz$full_width/2,
height = 0.85*viz$full_width/2,
scale = viz$scale*1.1)
load(file = paste0(IO$out_Rdata,"table_TB_change_cat_x_BC_df.Rdata"), verbose = TRUE)
## Loading objects:
## table_TB_change_cat_x_BC_df
load(file = paste0(IO$out_Rdata,"hist_log2_TB_ratio_x_BC.Rdata"), verbose = TRUE)
## Loading objects:
## hist_log2_TB_ratio_x_BC
g = ggplot(table_TB_change_cat_x_BC_df, aes(x = TB_change_cat_wrap, y = n_users, fill = BC))+
geom_bar(stat = "identity")+
facet_grid(BC ~., scale = "free")+
xlab("")+ylab("# of users")+
scale_fill_manual(values = cols$BC)
g

g_hist = ggplot(hist_log2_TB_ratio_x_BC, aes(x = log2_TB_ratio, y = n_users, fill = transition))+
geom_bar(stat = "identity", width = 0.5)+
facet_grid(transition ~., scale = "free")+
xlab("log2(TB ratio: after/before)")+ylab("# of users")+
#geom_vline(xintercept = c(-value_inf+0.5,-1.5,-0.5,0.5,1.5,value_inf-0.5), col = "gray20", linetype = 2, size = 0.2)+
geom_vline(xintercept = c(-0.5,0.5), col = "gray20", linetype = 2, size = 0.2)+
geom_vline(xintercept = 0, col = "gray20", linetype = 1, size = 0.5)+
scale_fill_manual(values = cols$BC)+
scale_x_continuous(breaks = seq(-6,6,by = 1))
g_hist

ggsave(filename = paste0(IO$panels, "pill_transition_TB_change_cat_x_BC.pdf"),
plot = g,
width = viz$full_width/2,
height = 0.4*viz$full_width/2,
scale = viz$scale)
ggsave(filename = paste0(IO$panels, "pill_transition_hist_log2_TB_ratio_x_BC.pdf"),
plot = g_hist,
width = viz$full_width/2,
height = 0.33*viz$full_width/2,
scale = viz$scale)
load(file = paste0(IO$out_Rdata,"days_selected_users_for_pill_transition_examples.Rdata"), verbose = TRUE)
## Loading objects:
## selected_users_days
selected_users_days$user_id = selected_users_days$stretch_id
stretch_ids = unique(selected_users_days$stretch_id)
for(stretch_id in stretch_ids){
cat("\t\t",stretch_id,"\n")
this_stretch_days = selected_users_days[selected_users_days$stretch_id == stretch_id,]
trans = unique(this_stretch_days$transition)
g = ggplot_user_history(this_stretch_days, pill_transition = trans, print_x_lab = FALSE, print_title = FALSE, make_x_axis_symmetrical = TRUE)
print(g)
ggsave(filename = paste0(IO$panels, "profiles_pill_transition_example_",stretch_id,".pdf"),
plot = g,
width = viz$full_width/2,
height = 0.18*viz$full_width/2,
scale = viz$scale*1.2)
}
## user_1

## user_2

## user_3

## user_4

## user_5

## user_6

## user_7

## user_8

## user_9

## user_10
